/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.fa;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.StringTokenizer;

import mosdi.util.Log;

public class IIDTextModel extends FiniteMemoryTextModel {

	private int alphabetSize;
	private double[] charDist;

	/** Constructs uniformly distributed i.i.d. model. */
	public IIDTextModel(int alphabetSize) {
		this.alphabetSize = alphabetSize;
		charDist = new double[alphabetSize];
		Arrays.fill(charDist, 1.0/(double)alphabetSize);
	}

	public IIDTextModel(int alphabetSize, double[] characterDistribution) {
		if (characterDistribution.length != alphabetSize) throw new IllegalArgumentException("Distribution has wrong length.");
		this.alphabetSize = alphabetSize;
		double sum = 0.0;
		for (int i=0; i<characterDistribution.length; ++i) sum+=characterDistribution[i];
		this.charDist = new double[characterDistribution.length];
		for (int i=0; i<characterDistribution.length; ++i) this.charDist[i] = characterDistribution[i]/sum;
	}

	/** Estimate distribution from a given text. */
	public IIDTextModel(Alphabet alphabet, String background) {
		this(alphabet.size(), alphabet.buildIndexArray(background));
	}

	public IIDTextModel(int alphabetSize, int[] background) {
		this(alphabetSize, background, 0.0);
	}

	/** Estimate distribution from a given text. */
	public IIDTextModel(int alphabetSize, int[] background, double pseudoCounts) {
		this.alphabetSize = alphabetSize;
		double[] freq = new double[alphabetSize];
		Arrays.fill(freq, pseudoCounts);
		double sum = pseudoCounts*alphabetSize;
		for (int i=0; i<background.length; ++i) {
			int c = background[i];
			if (c>=0) {
				freq[c]+=1;
				sum+=1.0;
			}
		}
		for (int i=0; i<freq.length; ++i) {
			freq[i]/=sum;
		}
		charDist = freq;
	}

	/** Estimate distribution from a given list of texts. */
	public IIDTextModel(Alphabet alphabet, List<String> background) {
		this.alphabetSize = alphabet.size();
		double[] freq = new double[alphabetSize];
		long n = 0; 
		for (String s : background) {
			for (int i=0; i<s.length(); ++i) {
				freq[alphabet.getIndex(s.charAt(i))]+=1;
			}
			n+=s.length();
		}
		for (int i=0; i<freq.length; ++i) {
			freq[i]/=(double)n;
		}
		charDist = freq;
	}

	/** Estimate distribution from a given list of texts. */
	public IIDTextModel(int alphabetSize, List<int[]> background) {
		this.alphabetSize = alphabetSize;
		double[] freq = new double[alphabetSize];
		long n = 0; 
		for (int[] s : background) {
			for (int c : s) {
				if (c>=0) {
					freq[c]+=1;
					n+=1;
				}
			}
		}
		for (int i=0; i<freq.length; ++i) {
			freq[i]/=(double)n;
		}
		charDist = freq;
	}

	@Override
	public FiniteMemoryTextModel reverseTextModel() {
		return this;
	}

	@Override
	public int[] generateRandomText(int length) {
		int[] s = new int[length];
		Random random = new Random();
		for (int i=0; i<length; ++i) {
			double p = random.nextDouble();
			int c = 0;
			while ((p>charDist[c]) && (c<alphabetSize)) {
				p-=charDist[c];
				c+=1;
			}
			s[i] = c;
		}
		return s;
	}

	public double[] getCharacterDistribution() {
		return charDist;
	}

	public int getOrder() { return 0; }

	@Override
	public int getStateCount() {
		return 1;
	}

	@Override
	public double getProbability(int sourceState, int character) {
		return charDist[character];
	}

	@Override
	public double getProbability(int sourceState, int character, int targetState) {
		if ((sourceState!=0) || (targetState!=0)) throw new IndexOutOfBoundsException("Illegal state index.");
		if ((character<0) || (character>=alphabetSize)) throw new IndexOutOfBoundsException("Illegal character.");
		return charDist[character];
	}

	@Override
	public int[] getTransitionTargets(int sourceState, int character) {
		if (sourceState!=0) throw new IndexOutOfBoundsException("Illegal state index.");
		if ((character<0) || (character>=alphabetSize)) throw new IndexOutOfBoundsException("Illegal character.");
		int[] result = {0}; 
		return result;
	}

	@Override
	public double getEquilibriumProbability(int state) {
		if (state!=0) throw new IndexOutOfBoundsException("Illegal state index.");
		return 1.0;
	}

	@Override
	public int getAlphabetSize() {
		return alphabetSize;
	}

	public static IIDTextModel readFromFile(String filename, Alphabet alphabet) {
		FileInputStream alphabetFile = null;
		try {
			alphabetFile = new FileInputStream(filename);
		} catch (FileNotFoundException e) {
			Log.errorln("File with character distribution not found, sorry!");
			System.exit(1);
		}
		BufferedReader br = new BufferedReader(new InputStreamReader(alphabetFile));
		double[] dist = new double[alphabet.size()];
		try {
			while (true) {
				String line = br.readLine();
				if (line==null) break;
				StringTokenizer st = new StringTokenizer(line," \t",false);
				if (!st.hasMoreTokens()) continue;
				char c = st.nextToken().charAt(0);
				if (!st.hasMoreTokens()) continue;
				double p = Double.parseDouble(st.nextToken());
				if (alphabet.contains(c)) dist[alphabet.getIndex(c)]=p;
			}
		} catch (IOException e) {
			Log.errorln("I/O failure, sorry!");
			System.exit(1);
		}
		return new IIDTextModel(alphabet.size(), dist);
	}

	public void writeToFile(File file, Alphabet alphabet) {
		PrintWriter out = null;
		try {
			out = new PrintWriter(new BufferedWriter(new FileWriter(file)));
		} catch (IOException e) {
			Log.errorln("Sorry, could not write character distribution!");
			Log.printException(e);
			System.exit(1);
		}
		for (int i = 0; i < this.charDist.length; ++i) {
			out.printf("%s\t%f%n",alphabet.get(i),this.charDist[i]);
		}
		out.close();
	}

	@Override
	public String toString() {
		StringBuffer sb = new StringBuffer();
		for (int c=0; c<alphabetSize; ++c) {
			if (c>0) sb.append("; ");
			sb.append(String.format("%d:%f",c,charDist[c]));
		}
		return sb.toString();
	}

	@Override
	public int order() {
		return 0;
	}

}
